/*
 * Copyright 2018, Oath Inc
 * Licensed under the terms of the Apache License 2.0. Please refer to accompanying LICENSE file for terms.
 */

// This code is a derivative work heavily modified from the OHC project. See NOTICE file for copyright and license.

package com.oath.halodb.histo;

import com.google.common.base.Objects;

import org.slf4j.Logger;

import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;

public class EstimatedHistogram {

    /**
     * The series of values to which the counts in `buckets` correspond: 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20,
     * etc. Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4.
     *
     * The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1 to around
     * 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to 36 seconds,
     * with less precision as the numbers get larger.
     *
     * Each bucket represents values from (previous bucket offset, current offset].
     */
    private final long[] bucketOffsets;

    // buckets is one element longer than bucketOffsets -- the last element is values greater than the last offset
    final AtomicLongArray buckets;

    public EstimatedHistogram() {
        this(90);
    }

    public EstimatedHistogram(int bucketCount) {
        bucketOffsets = newOffsets(bucketCount);
        buckets = new AtomicLongArray(bucketOffsets.length + 1);
    }

    public EstimatedHistogram(long[] offsets, long[] bucketData) {
        assert bucketData.length == offsets.length + 1;
        bucketOffsets = offsets;
        buckets = new AtomicLongArray(bucketData);
    }

    private static long[] newOffsets(int size) {
        long[] result = new long[size];
        long last = 1;
        result[0] = last;
        for (int i = 1; i < size; i++) {
            long next = Math.round(last * 1.2);
            if (next == last) {
                next++;
            }
            result[i] = next;
            last = next;
        }

        return result;
    }

    /**
     * @return the histogram values corresponding to each bucket index
     */
    public long[] getBucketOffsets() {
        return bucketOffsets;
    }

    /**
     * Increments the count of the bucket closest to n, rounding UP.
     */
    public void add(long n) {
        int index = Arrays.binarySearch(bucketOffsets, n);
        if (index < 0) {
            // inexact match, take the first bucket higher than n
            index = -index - 1;
        }
        // else exact match; we're good
        buckets.incrementAndGet(index);
    }

    /**
     * @return the count in the given bucket
     */
    long get(int bucket) {
        return buckets.get(bucket);
    }

    /**
     * @param reset zero out buckets afterwards if true
     * @return a long[] containing the current histogram buckets
     */
    public long[] getBuckets(boolean reset) {
        final int len = buckets.length();
        long[] rv = new long[len];

        if (reset) {
            for (int i = 0; i < len; i++) {
                rv[i] = buckets.getAndSet(i, 0L);
            }
        } else {
            for (int i = 0; i < len; i++) {
                rv[i] = buckets.get(i);
            }
        }

        return rv;
    }

    /**
     * @return the smallest value that could have been added to this histogram
     */
    public long min() {
        for (int i = 0; i < buckets.length(); i++) {
            if (buckets.get(i) > 0) {
                return i == 0 ? 0 : 1 + bucketOffsets[i - 1];
            }
        }
        return 0;
    }

    /**
     * @return the largest value that could have been added to this histogram.  If the histogram overflowed, returns
     * Long.MAX_VALUE.
     */
    public long max() {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0) {
            return Long.MAX_VALUE;
        }

        for (int i = lastBucket - 1; i >= 0; i--) {
            if (buckets.get(i) > 0) {
                return bucketOffsets[i];
            }
        }
        return 0;
    }

    /**
     * @return estimated value at given percentile
     */
    public long percentile(double percentile) {
        assert percentile >= 0 && percentile <= 1.0;
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0) {
            throw new IllegalStateException("Unable to compute when histogram overflowed");
        }

        long pcount = (long) Math.floor(count() * percentile);
        if (pcount == 0) {
            return 0;
        }

        long elements = 0;
        for (int i = 0; i < lastBucket; i++) {
            elements += buckets.get(i);
            if (elements >= pcount) {
                return bucketOffsets[i];
            }
        }
        return 0;
    }

    /**
     * @return the mean histogram value (average of bucket offsets, weighted by count)
     * @throws IllegalStateException if any values were greater than the largest bucket threshold
     */
    public long mean() {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0) {
            throw new IllegalStateException("Unable to compute ceiling for max when histogram overflowed");
        }

        long elements = 0;
        long sum = 0;
        for (int i = 0; i < lastBucket; i++) {
            long bCount = buckets.get(i);
            elements += bCount;
            sum += bCount * bucketOffsets[i];
        }

        return (long) Math.ceil((double) sum / elements);
    }

    /**
     * @return the total number of non-zero values
     */
    public long count() {
        long sum = 0L;
        for (int i = 0; i < buckets.length(); i++) {
            sum += buckets.get(i);
        }
        return sum;
    }

    /**
     * @return true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was
     * added
     */
    public boolean isOverflowed() {
        return buckets.get(buckets.length() - 1) > 0;
    }

    /**
     * log.debug() every record in the histogram
     */
    public void log(Logger log) {
        // only print overflow if there is any
        int nameCount;
        if (buckets.get(buckets.length() - 1) == 0) {
            nameCount = buckets.length() - 1;
        } else {
            nameCount = buckets.length();
        }
        String[] names = new String[nameCount];

        int maxNameLength = 0;
        for (int i = 0; i < nameCount; i++) {
            names[i] = nameOfRange(bucketOffsets, i);
            maxNameLength = Math.max(maxNameLength, names[i].length());
        }

        // emit log records
        String formatstr = "%" + maxNameLength + "s: %d";
        for (int i = 0; i < nameCount; i++) {
            long count = buckets.get(i);
            // sort-of-hack to not print empty ranges at the start that are only used to demarcate the
            // first populated range. for code clarity we don't omit this record from the maxNameLength
            // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur
            if (i == 0 && count == 0) {
                continue;
            }
            log.debug(String.format(formatstr, names[i], count));
        }
    }

    public String toString() {
        // only print overflow if there is any
        int nameCount;
        if (buckets.get(buckets.length() - 1) == 0) {
            nameCount = buckets.length() - 1;
        } else {
            nameCount = buckets.length();
        }
        String[] names = new String[nameCount];

        int maxNameLength = 0;
        for (int i = 0; i < nameCount; i++) {
            names[i] = nameOfRange(bucketOffsets, i);
            maxNameLength = Math.max(maxNameLength, names[i].length());
        }

        StringBuilder sb = new StringBuilder();

        // emit log records
        String formatstr = "%" + maxNameLength + "s: %d\n";
        for (int i = 0; i < nameCount; i++) {
            long count = buckets.get(i);
            // sort-of-hack to not print empty ranges at the start that are only used to demarcate the
            // first populated range. for code clarity we don't omit this record from the maxNameLength
            // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur
            if (i == 0 && count == 0) {
                continue;
            }
            sb.append(String.format(formatstr, names[i], count));
        }

        return sb.toString();
    }

    private static String nameOfRange(long[] bucketOffsets, int index) {
        StringBuilder sb = new StringBuilder();
        appendRange(sb, bucketOffsets, index);
        return sb.toString();
    }

    private static void appendRange(StringBuilder sb, long[] bucketOffsets, int index) {
        sb.append("[");
        if (index == 0) {
            if (bucketOffsets[0] > 0)
            // by original definition, this histogram is for values greater than zero only;
            // if values of 0 or less are required, an entry of lb-1 must be inserted at the start
            {
                sb.append("1");
            } else {
                sb.append("-Inf");
            }
        } else {
            sb.append(bucketOffsets[index - 1] + 1);
        }
        sb.append("..");
        if (index == bucketOffsets.length) {
            sb.append("Inf");
        } else {
            sb.append(bucketOffsets[index]);
        }
        sb.append("]");
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }

        if (!(o instanceof EstimatedHistogram)) {
            return false;
        }

        EstimatedHistogram that = (EstimatedHistogram) o;
        return Arrays.equals(getBucketOffsets(), that.getBucketOffsets()) &&
               Arrays.equals(getBuckets(false), that.getBuckets(false));
    }

    @Override
    public int hashCode() {
        return Objects.hashCode(getBucketOffsets(), getBuckets(false));
    }
}
